Setup
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tibble)
library(stringr)
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
library(mirt)
## Loading required package: stats4
## Loading required package: lattice
library(brms)
## Loading required package: Rcpp
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Loading 'brms' package (version 2.10.6). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
## The following object is masked from 'package:mirt':
##
## fixef
cores <- 4
iter <- 3000
warmup <- 1000
control <- list(adapt_delta = 0.95)
options(width = 160)
theme_set(theme_default())
scale2 <- function(x, na.rm = TRUE) {
(x - mean(x, na.rm = na.rm)) / sd(x, na.rm = na.rm)
}
center <- function(x, na.rm = TRUE) {
x - mean(x, na.rm = na.rm)
}
r2z <- function(r) {
log((1 + r) / (1 - r)) / 2
}
z2r <- function(z) {
(exp(2 * z) - 1) / (exp(2 * z) + 1)
}
SW <- suppressWarnings
sum_coding <- function(x, lvls = levels(x)) {
# codes the first category with -1
nlvls <- length(lvls)
stopifnot(nlvls > 1)
cont <- diag(nlvls)[, -nlvls, drop = FALSE]
cont[nlvls, ] <- -1
cont <- cont[c(nlvls, 1:(nlvls - 1)), , drop = FALSE]
colnames(cont) <- lvls[-1]
x <- factor(x, levels = lvls)
contrasts(x) <- cont
x
}
dummy_coding <- function(x, lvls = levels(x)) {
x <- factor(x, levels = lvls)
contrasts(x) <- contr.treatment(levels(x))
x
}
Sum <- function(df, vars, na.rm = TRUE) {
# row-wise sum of variables vars stored in df
vars <- enquo(vars)
df <- select(df, !!vars)
all_na <- apply(df, 1, function(x) all(is.na(x)))
out <- rowSums(select(df, !!vars), na.rm = na.rm)
ifelse(all_na, NA, out)
}
Mean <- function(df, vars, na.rm = TRUE) {
# row-wise mean of variables vars stored in df
vars <- enquo(vars)
df <- select(df, !!vars)
all_na <- apply(df, 1, function(x) all(is.na(x)))
out <- rowMeans(select(df, !!vars), na.rm = na.rm)
ifelse(all_na, NA, out)
}
ll <- function(y, p) {
y * log(p) + (1 - y) * log(1 - p)
}
fit_statistic <- function(model, criterion, group, nsamples = NULL) {
group <- enquo(group)
subset <- NULL
if (!is.null(nsamples)) {
subset <- sample(seq_len(nsamples(model)), nsamples)
}
ppe <- pp_expect(model, subset = subset) %>%
t() %>%
as.data.frame() %>%
cbind(model$data) %>%
gather("draw", "ppe", starts_with("V"))
yrep <- posterior_predict(model, subset = subset) %>%
t() %>%
as.data.frame() %>%
cbind(spm_long) %>%
gather("draw", "yrep", starts_with("V"))
ppe %>%
mutate(yrep = yrep$yrep) %>%
mutate(
crit = criterion(response2, ppe),
crit_rep = criterion(yrep, ppe)
) %>%
group_by(!!group, draw) %>%
summarise(
crit = sum(crit),
crit_rep = sum(crit_rep),
crit_diff = crit_rep - crit
) %>%
mutate(draw = as.numeric(sub("^V", "", draw))) %>%
arrange(!!group, draw) %>%
identity()
}
theme_hist <- function(...) {
bayesplot::theme_default() +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
...
)
}
1PL Models
MCMC with hierarchical item priors
formula_1pl <- bf(
formula = response2 ~ 1 + (1 | item) + (1 | person),
family = brmsfamily("bernoulli", link = "logit")
)
prior_1pl <-
prior("normal(0, 2)", class = "Intercept") +
prior("normal(0, 3)", class = "sd", group = "person") +
prior("normal(0, 3)", class = "sd", group = "item")
fit_1pl <- brm(
formula = formula_1pl,
data = spm_long,
prior = prior_1pl,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control,
file = "models/fit_1pl"
)
fit_1pl <- add_loo(fit_1pl)
summary(fit_1pl)
## Family: bernoulli
## Links: mu = logit
## Formula: response2 ~ 1 + (1 | item) + (1 | person)
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~item (Number of levels: 12)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.59 0.39 1.03 2.57 1.00 1408 2465
##
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.69 0.08 1.54 1.85 1.00 2348 3996
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.04 0.46 0.14 1.95 1.01 798 1571
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_1pl)

person_pars_1pl <-
ranef(fit_1pl, summary = FALSE)$person[, , "Intercept"]
person_sds_1pl <- apply(person_pars_1pl, 1, sd)
person_pars_1pl <- person_pars_1pl %>%
sweep(1, person_sds_1pl, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_1pl, "results/person_pars_1pl")
person_pars_1pl %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_1pl <- fit_statistic(
fit_1pl, criterion = ll, group = person,
nsamples = 1000
)
person_diff_1pl <- person_fit_1pl %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_1pl <- which.max(person_diff_1pl$bp)
person_fit_1pl %>%
filter(person == person_max_1pl) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

item_pars_1pl <- coef(fit_1pl, summary = FALSE)$item[, , "Intercept"] %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column() %>%
rename(item = "rowname") %>%
mutate(
item = as.numeric(item),
method = "MCMC-H"
)
saveRDS(item_pars_1pl, "results/item_pars_1pl")
item_fit_1pl <- fit_statistic(
fit_1pl, criterion = ll, group = item,
nsamples = 1000
)
item_fit_1pl %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

MCMC with non-hierarchical item priors
formula_1pl_nh <- bf(
formula = response2 ~ 0 + item + (1 | person),
family = brmsfamily("bernoulli", link = "logit")
)
prior_1pl_nh <-
prior("normal(0, 5)", class = "b") +
prior("normal(0, 3)", class = "sd", group = "person")
fit_1pl_nh <- brm(
formula = formula_1pl_nh,
data = spm_long,
prior = prior_1pl_nh,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control,
file = "models/fit_1pl_nh"
)
fit_1pl_nh <- add_loo(fit_1pl_nh)
## Automatically saving the model object in 'models/fit_1pl_nh.rds'
summary(fit_1pl_nh)
## Family: bernoulli
## Links: mu = logit
## Formula: response2 ~ 0 + item + (1 | person)
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.69 0.08 1.54 1.86 1.00 2990 4898
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## item1 1.69 0.15 1.41 1.99 1.00 4299 5199
## item2 3.27 0.20 2.88 3.67 1.00 6219 5395
## item3 2.06 0.16 1.75 2.37 1.00 4442 5173
## item4 2.24 0.16 1.93 2.56 1.00 4361 5521
## item5 2.57 0.17 2.23 2.91 1.00 4861 5432
## item6 1.72 0.15 1.44 2.02 1.00 4571 5862
## item7 1.26 0.14 0.98 1.55 1.00 3585 4921
## item8 0.49 0.14 0.22 0.77 1.00 4006 5248
## item9 0.44 0.14 0.17 0.71 1.00 3821 4842
## item10 -0.64 0.14 -0.91 -0.37 1.00 4154 5024
## item11 -0.88 0.14 -1.16 -0.60 1.00 4131 5199
## item12 -1.09 0.14 -1.37 -0.81 1.00 4407 5029
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_1pl_nh)



person_pars_1pl_nh <-
ranef(fit_1pl_nh, summary = FALSE)$person[, , "Intercept"]
person_sds_1pl_nh <- apply(person_pars_1pl_nh, 1, sd)
person_pars_1pl_nh <- person_pars_1pl_nh %>%
sweep(1, person_sds_1pl_nh, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_1pl_nh, "results/person_pars_1pl_nh")
person_pars_1pl_nh %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_1pl_nh <- fit_statistic(
fit_1pl_nh, criterion = ll, group = person,
nsamples = 1000
)
person_diff_1pl_nh <- person_fit_1pl_nh %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_1pl_nh <- which.max(person_diff_1pl_nh$bp)
person_fit_1pl_nh %>%
filter(person == person_max_1pl_nh) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

item_pars_1pl_nh <- brms::fixef(fit_1pl_nh, summary = FALSE) %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column() %>%
rename(item = "rowname") %>%
mutate(
item = as.numeric(item),
item = item - 0.1,
method = "MCMC-NH"
)
saveRDS(item_pars_1pl_nh, "results/item_pars_1pl_nh")
item_fit_1pl_nh <- fit_statistic(
fit_1pl_nh, criterion = ll, group = item,
nsamples = 1000
)
item_fit_1pl_nh %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

MAP
parprior_1pl_map <- list(c(seq(2, 46, 4), "norm", 0, 5))
fit_1pl_map <- mirt(
spm_wide, model = 1,
itemtype = "Rasch", SE = TRUE,
technical = list(NCYCLES = 10000),
parprior = parprior_1pl_map
)
person_pars_1pl_map <- fscores(fit_1pl_map, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 1PL model
scale = sd(Estimate) / sd(person_pars_1pl$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_1pl_map, "results/person_pars_1pl_map")
item_pars_1pl_map <- coef(fit_1pl_map, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.d$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.2,
method = "MAP"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_1pl_map, "results/item_pars_1pl_map")
ML
fit_1pl_ml <- mirt(spm_wide, model = 1, itemtype = "Rasch", SE = TRUE,
technical = list(NCYCLES = 10000))
person_pars_1pl_ml <- fscores(fit_1pl_ml, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 1PL model
scale = sd(Estimate) / sd(person_pars_1pl$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_1pl_ml, "results/person_pars_1pl_ml")
item_pars_1pl_ml <- coef(fit_1pl_ml, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.d$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.3,
method = "ML"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_1pl_ml, "results/item_pars_1pl_ml")
Joint plots: Person parameters
# plot comparison of person parameters
person_pars_1pl_all <- person_pars_1pl %>%
bind_cols(person_pars_1pl_ml) %>%
mutate(
UIW = Q97.5 - Q2.5,
UIW1 = Q97.51 - Q2.51
)
person_pars_1pl_all %>%
ggplot(aes(Estimate, Estimate1, color = UIW)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(-3, 2), y = c(-3, 2)) +
labs(
x = "Estimate (MCMC-H)",
y = "Estimate (ML)",
color = "UIW (MCMC-H)"
)

person_pars_1pl_all %>%
ggplot(aes(UIW, UIW1, color = Estimate)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(1, 2.4), y = c(1, 2.4)) +
labs(
x = "UIW (MCMC-H)",
y = "UIW (ML)",
color = "Estimate (MCMC-H)"
)
## Warning: Removed 18 rows containing missing values (geom_point).

Joint plots: Item parameters
item_pars_1pl %>%
bind_rows(
item_pars_1pl_nh,
item_pars_1pl_map,
item_pars_1pl_ml
) %>%
mutate(
method = factor(method, levels = c("MCMC-H", "MCMC-NH", "MAP", "ML"))
) %>%
ggplot(aes(item, Estimate, ymin = Q2.5, ymax = Q97.5, color = method)) +
scale_x_continuous(breaks = 1:12) +
geom_pointrange() +
scale_color_viridis_d(name = "Method") +
coord_flip() +
labs(x = "Item Number")

2PL Models
MCMC with hierarchical item priors
formula_2pl <- bf(
response2 ~ beta + exp(logalpha) * theta,
nl = TRUE,
theta ~ 0 + (1 | person),
beta ~ 1 + (1 |i| item),
logalpha ~ 1 + (1 |i| item),
family = brmsfamily("bernoulli", link = "logit")
)
prior_2pl <-
prior("normal(0, 2)", class = "b", nlpar = "beta") +
prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
prior("normal(0, 1)", class = "sd", group = "person", nlpar = "theta") +
prior("normal(0, 3)", class = "sd", group = "item", nlpar = "beta") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logalpha")
fit_2pl <- brm(
formula = formula_2pl,
data = spm_long,
prior = prior_2pl,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control, inits = 0,
file = "models/fit_2pl"
)
fit_2pl <- add_loo(fit_2pl)
## Warning: Found 3 observations with a pareto_k > 0.7 in model 'fit_2pl'. It is recommended to set 'reloo = TRUE' in order to calculate the ELPD without the
## assumption that these observations are negligible. This will refit the model 3 times to compute the ELPDs for the problematic observations directly.
summary(fit_2pl)
## Family: bernoulli
## Links: mu = logit
## Formula: response2 ~ beta + exp(logalpha) * theta
## theta ~ 0 + (1 | person)
## beta ~ 1 + (1 | i | item)
## logalpha ~ 1 + (1 | i | item)
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(theta_Intercept) 1.13 0.56 0.28 2.40 1.00 4300 3982
##
## ~item (Number of levels: 12)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(beta_Intercept) 2.11 0.51 1.36 3.31 1.00 1707 3506
## sd(logalpha_Intercept) 0.51 0.13 0.31 0.82 1.00 2109 3538
## cor(beta_Intercept,logalpha_Intercept) 0.62 0.20 0.13 0.90 1.00 2196 3868
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## beta_Intercept 1.29 0.61 -0.01 2.45 1.00 985 1835
## logalpha_Intercept 0.58 0.57 -0.35 1.88 1.00 3808 3752
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_2pl)


person_pars_2pl <-
ranef(fit_2pl, summary = FALSE)$person[, , "theta_Intercept"]
person_sds_2pl <- apply(person_pars_2pl, 1, sd)
person_pars_2pl <- person_pars_2pl %>%
sweep(1, person_sds_2pl, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_2pl, "results/person_pars_2pl")
person_pars_2pl %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_2pl <- fit_statistic(
fit_2pl, criterion = ll, group = person,
nsamples = 1000
)
person_diff_2pl <- person_fit_2pl %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_2pl <- which.max(person_diff_2pl$bp)
person_fit_2pl %>%
filter(person == person_max_2pl) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

item_pars_2pl <- coef(fit_2pl, summary = FALSE)$item
# locations
beta <- item_pars_2pl[, , "beta_Intercept"] %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# slopes
alpha <- item_pars_2pl[, , "logalpha_Intercept"] %>%
exp() %>%
sweep(1, person_sds_2pl, "*") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
item_pars_2pl <- bind_rows(beta, alpha, .id = "nlpar") %>%
rename(item = "rowname") %>%
mutate(item = as.numeric(item)) %>%
mutate(
nlpar = factor(nlpar, labels = c("Location", "Slope")),
method = "MCMC-H"
)
saveRDS(item_pars_2pl, "results/item_pars_2pl")
item_fit_2pl <- fit_statistic(
fit_2pl, criterion = ll, group = item,
nsamples = 1000
)
item_fit_2pl %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

MCMC with non-hierarchical item priors
formula_2pl_nh <- bf(
response2 ~ beta + exp(logalpha) * theta,
nl = TRUE,
theta ~ 0 + (1 | person),
beta ~ 0 + item,
logalpha ~ 0 + item,
family = brmsfamily("bernoulli", link = "logit")
)
prior_2pl_nh <-
prior("normal(0, 5)", class = "b", nlpar = "beta") +
prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
prior("normal(0, 1)", class = "sd", group = "person", nlpar = "theta")
fit_2pl_nh <- brm(
formula = formula_2pl_nh,
data = spm_long,
prior = prior_2pl_nh,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control, inits = 0,
file = "models/fit_2pl_nh"
)
fit_2pl_nh <- add_loo(fit_2pl_nh)
## Warning: Found 22 observations with a pareto_k > 0.7 in model 'fit_2pl_nh'. With this many problematic observations, it may be more appropriate to use 'kfold'
## with argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
## Automatically saving the model object in 'models/fit_2pl_nh.rds'
summary(fit_2pl_nh)
## Family: bernoulli
## Links: mu = logit
## Formula: response2 ~ beta + exp(logalpha) * theta
## theta ~ 0 + (1 | person)
## beta ~ 0 + item
## logalpha ~ 0 + item
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(theta_Intercept) 1.34 0.51 0.53 2.50 1.00 729 1305
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## beta_item1 1.32 0.13 1.08 1.59 1.00 8455 5996
## beta_item2 3.55 0.38 2.88 4.35 1.00 6795 5525
## beta_item3 2.06 0.21 1.68 2.50 1.00 5005 6057
## beta_item4 4.17 0.69 3.05 5.76 1.00 3712 4043
## beta_item5 5.54 1.05 3.94 8.03 1.00 3081 3765
## beta_item6 2.12 0.25 1.66 2.66 1.00 4567 5118
## beta_item7 1.22 0.16 0.93 1.55 1.00 4786 5926
## beta_item8 0.49 0.14 0.24 0.76 1.00 3790 5155
## beta_item9 0.40 0.12 0.16 0.64 1.00 4432 4778
## beta_item10 -0.72 0.17 -1.06 -0.40 1.00 3173 4969
## beta_item11 -0.83 0.14 -1.11 -0.56 1.00 4561 5589
## beta_item12 -0.92 0.12 -1.17 -0.68 1.00 5219 5212
## logalpha_item1 -0.41 0.43 -1.19 0.50 1.00 837 1465
## logalpha_item2 0.46 0.42 -0.32 1.34 1.00 799 1618
## logalpha_item3 0.30 0.41 -0.45 1.16 1.00 781 1590
## logalpha_item4 1.20 0.43 0.42 2.11 1.00 833 1577
## logalpha_item5 1.37 0.44 0.58 2.29 1.00 822 1686
## logalpha_item6 0.64 0.42 -0.11 1.52 1.00 788 1610
## logalpha_item7 0.21 0.42 -0.53 1.09 1.00 755 1430
## logalpha_item8 0.25 0.41 -0.48 1.12 1.00 746 1335
## logalpha_item9 0.01 0.42 -0.74 0.89 1.00 796 1262
## logalpha_item10 0.57 0.42 -0.17 1.46 1.00 764 1434
## logalpha_item11 0.19 0.41 -0.55 1.06 1.00 757 1443
## logalpha_item12 -0.10 0.42 -0.84 0.79 1.00 785 1422
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_2pl_nh)





person_pars_2pl_nh <-
ranef(fit_2pl_nh, summary = FALSE)$person[, , "theta_Intercept"]
person_sds_2pl_nh <- apply(person_pars_2pl_nh, 1, sd)
person_pars_2pl_nh <- person_pars_2pl_nh %>%
sweep(1, person_sds_2pl_nh, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_2pl_nh, "results/person_pars_2pl_nh")
person_pars_2pl_nh %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_2pl_nh <- fit_statistic(
fit_2pl_nh, criterion = ll, group = person,
nsamples = 1000
)
person_diff_2pl_nh <- person_fit_2pl_nh %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_2pl_nh <- which.max(person_diff_2pl_nh$bp)
person_fit_2pl_nh %>%
filter(person == person_max_2pl_nh) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# extract item parameters
item_pars_2pl_nh <- brms::fixef(fit_2pl_nh, summary = FALSE)
# plot item parameters
# locations
sel_beta <- grepl("^beta", colnames(item_pars_2pl_nh))
beta <- item_pars_2pl_nh[, sel_beta] %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# slopes
sel_alpha <- grepl("^logalpha", colnames(item_pars_2pl_nh))
alpha <- item_pars_2pl_nh[, sel_alpha] %>%
exp() %>%
sweep(1, person_sds_2pl_nh, "*") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
item_pars_2pl_nh <- bind_rows(beta, alpha, .id = "nlpar") %>%
rename(item = "rowname") %>%
mutate(
item = as.numeric(item),
item = item - 0.1,
nlpar = factor(nlpar, labels = c("Location", "Slope")),
method = "MCMC-NH"
)
saveRDS(item_pars_2pl_nh, "results/item_pars_2pl_nh")
item_fit_2pl_nh <- fit_statistic(
fit_2pl_nh, criterion = ll, group = item,
nsamples = 1000
)
item_fit_2pl_nh %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 12 rows containing non-finite values (stat_bin).

MAP
parprior_2pl_map <- list(
c(seq(1, 45, 4), "lnorm", 0, 2),
c(seq(2, 46, 4), "norm", 0, 5)
)
fit_2pl_map <- mirt(
spm_wide, model = 1,
itemtype = "2PL", SE = TRUE,
technical = list(NCYCLES = 10000),
parprior = parprior_2pl_map
)
person_pars_2pl_map <- fscores(fit_2pl_map, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 2pl model
scale = sd(Estimate) / sd(person_pars_2pl$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_2pl_map, "results/person_pars_2pl_map")
item_pars_2pl_map <- coef(fit_2pl_map, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.(a1|d)$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.2,
nlpar = str_match(rowname, "\\.[^\\.]+$") %>%
str_remove("^\\.") %>%
factor(
levels = c("d", "a1"),
labels = c("Location", "Slope")
),
method = "MAP"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_2pl_map, "results/item_pars_2pl_map")
ML
fit_2pl_ml <- mirt(spm_wide, model = 1, itemtype = "2PL", SE = TRUE,
technical = list(NCYCLES = 10000))
person_pars_2pl_ml <- fscores(fit_2pl_ml, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 2pl model
scale = sd(Estimate) / sd(person_pars_2pl$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_2pl_ml, "results/person_pars_2pl_ml")
item_pars_2pl_ml <- coef(fit_2pl_ml, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.(a1|d)$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.3,
nlpar = str_match(rowname, "\\.[^\\.]+$") %>%
str_remove("^\\.") %>%
factor(
levels = c("d", "a1"),
labels = c("Location", "Slope")
),
method = "ML"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_2pl_ml, "results/item_pars_2pl_ml")
Joint plots: Person parameters
# plot comparison of person parameters
person_pars_2pl_all <- person_pars_2pl %>%
bind_cols(person_pars_2pl_ml) %>%
mutate(
UIW = Q97.5 - Q2.5,
UIW1 = Q97.51 - Q2.51
)
person_pars_2pl_all %>%
ggplot(aes(Estimate, Estimate1, color = UIW)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(-3, 2), y = c(-3, 2)) +
labs(
x = "Estimate (MCMC-H)",
y = "Estimate (ML)",
color = "UIW (MCMC-H)"
)

person_pars_2pl_all %>%
ggplot(aes(UIW, UIW1, color = Estimate)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(1, 2.4), y = c(1, 2.4)) +
labs(
x = "UIW (MCMC-H)",
y = "UIW (ML)",
color = "Estimate (MCMC-H)"
)
## Warning: Removed 52 rows containing missing values (geom_point).

Joint plots: Item parameters
item_pars_2pl %>%
bind_rows(
item_pars_2pl_nh,
item_pars_2pl_map,
item_pars_2pl_ml
) %>%
mutate(
method = factor(method, levels = c("MCMC-H", "MCMC-NH", "MAP", "ML"))
) %>%
ggplot(aes(item, Estimate, ymin = Q2.5, ymax = Q97.5, color = method)) +
scale_x_continuous(breaks = 1:12) +
geom_pointrange() +
facet_wrap("nlpar", scales = "free_x") +
scale_color_viridis_d(name = "Method") +
coord_flip() +
labs(x = "Item Number")

3PL models with fixed guessing probability
MCMC with hierarchical item priors
formula_3pl_fixed <- bf(
response2 ~ 0.125 + 0.875 * inv_logit(beta + exp(logalpha) * theta),
nl = TRUE,
theta ~ 0 + (1 | person),
beta ~ 1 + (1 |i| item),
logalpha ~ 1 + (1 |i| item),
family = brmsfamily("bernoulli", link = "logit")
)
prior_3pl_fixed <-
prior("normal(0, 2)", class = "b", nlpar = "beta") +
prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
prior("normal(0, 1)", class = "sd", group = "person", nlpar = "theta") +
prior("normal(0, 3)", class = "sd", group = "item", nlpar = "beta") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logalpha")
fit_3pl_fixed <- brm(
formula = formula_3pl_fixed,
data = spm_long,
prior = prior_3pl_fixed,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control, inits = 0,
file = "models/fit_3pl_fixed"
)
fit_3pl_fixed <- add_loo(fit_3pl_fixed)
## Warning: Found 32 observations with a pareto_k > 0.7 in model 'fit_3pl_fixed'. With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
summary(fit_3pl_fixed)
## Family: bernoulli
## Links: mu = identity
## Formula: response2 ~ 0.125 + 0.875 * inv_logit(beta + exp(logalpha) * theta)
## theta ~ 0 + (1 | person)
## beta ~ 1 + (1 | i | item)
## logalpha ~ 1 + (1 | i | item)
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(theta_Intercept) 1.20 0.57 0.33 2.51 1.00 8172 4140
##
## ~item (Number of levels: 12)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(beta_Intercept) 2.65 0.63 1.68 4.13 1.00 2663 3870
## sd(logalpha_Intercept) 0.52 0.15 0.30 0.88 1.00 2917 4396
## cor(beta_Intercept,logalpha_Intercept) -0.08 0.31 -0.65 0.52 1.00 3451 4403
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## beta_Intercept 0.72 0.74 -0.80 2.14 1.00 1744 2750
## logalpha_Intercept 0.78 0.53 -0.13 1.96 1.00 7125 4303
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_3pl_fixed)


person_pars_3pl_fixed <-
ranef(fit_3pl_fixed, summary = FALSE)$person[, , "theta_Intercept"]
person_sds_3pl_fixed <- apply(person_pars_3pl_fixed, 1, sd)
person_pars_3pl_fixed <- person_pars_3pl_fixed %>%
sweep(1, person_sds_3pl_fixed, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_3pl_fixed, "results/person_pars_3pl_fixed")
person_pars_3pl_fixed %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_3pl_fixed <- fit_statistic(
fit_3pl_fixed, criterion = ll, group = person,
nsamples = 1000
)
person_diff_3pl_fixed <- person_fit_3pl_fixed %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_3pl_fixed <- which.max(person_diff_3pl_fixed$bp)
person_fit_3pl_fixed %>%
filter(person == person_max_3pl_fixed) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

item_pars_3pl_fixed <- coef(fit_3pl_fixed, summary = FALSE)$item
# locations
beta <- item_pars_3pl_fixed[, , "beta_Intercept"] %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# slopes
alpha <- item_pars_3pl_fixed[, , "logalpha_Intercept"] %>%
exp() %>%
sweep(1, person_sds_3pl_fixed, "*") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
item_pars_3pl_fixed <- bind_rows(beta, alpha, .id = "nlpar") %>%
rename(item = "rowname") %>%
mutate(item = as.numeric(item)) %>%
mutate(
nlpar = factor(nlpar, labels = c("Location", "Slope")),
method = "MCMC-H"
)
saveRDS(item_pars_3pl_fixed, "results/item_pars_3pl_fixed")
item_fit_3pl_fixed <- fit_statistic(
fit_3pl_fixed, criterion = ll, group = item,
nsamples = 1000
)
item_fit_3pl_fixed %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

MCMC with non-hierarchical item priors
formula_3pl_fixed_nh <- bf(
response2 ~ 0.125 + 0.875 * inv_logit(beta + exp(logalpha) * theta),
nl = TRUE,
theta ~ 0 + (1 | person),
beta ~ 0 + item,
logalpha ~ 0 + item,
family = brmsfamily("bernoulli", link = "logit")
)
prior_3pl_fixed_nh <-
prior("normal(0, 5)", class = "b", nlpar = "beta") +
prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
prior("normal(0, 1)", class = "sd", group = "person", nlpar = "theta")
fit_3pl_fixed_nh <- brm(
formula = formula_3pl_fixed_nh,
data = spm_long,
prior = prior_3pl_fixed_nh,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control, inits = 0,
file = "models/fit_3pl_fixed_nh"
)
fit_3pl_fixed_nh <- add_loo(fit_3pl_fixed_nh)
## Warning: Found 993 observations with a pareto_k > 0.7 in model 'fit_3pl_fixed_nh'. With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
## Automatically saving the model object in 'models/fit_3pl_fixed_nh.rds'
summary(fit_3pl_fixed_nh)
## Family: bernoulli
## Links: mu = logit
## Formula: response2 ~ 0.125 + 0.875 * inv_logit(beta + exp(logalpha) * theta)
## theta ~ 0 + (1 | person)
## beta ~ 0 + item
## logalpha ~ 0 + item
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(theta_Intercept) 2.46 0.57 1.39 3.65 1.00 2907 2459
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## beta_item1 6.04 2.44 2.46 11.81 1.00 6489 4681
## beta_item2 7.73 2.54 3.99 13.70 1.00 7508 4719
## beta_item3 6.70 2.48 3.02 12.48 1.00 6154 4913
## beta_item4 6.97 2.40 3.25 12.56 1.00 6575 4823
## beta_item5 7.30 2.46 3.52 13.00 1.00 6944 4741
## beta_item6 6.17 2.38 2.61 11.60 1.00 5040 4405
## beta_item7 4.11 2.29 0.86 9.57 1.00 4472 4132
## beta_item8 -1.06 2.24 -6.07 3.41 1.00 3742 3909
## beta_item9 -1.77 2.16 -6.70 2.11 1.00 3869 3536
## beta_item10 -7.34 2.62 -13.53 -3.51 1.00 6462 4589
## beta_item11 -7.83 2.61 -13.95 -3.91 1.00 7360 4863
## beta_item12 -8.04 2.56 -13.91 -4.17 1.00 7325 5096
## logalpha_item1 0.61 0.58 -0.70 1.61 1.00 3701 2706
## logalpha_item2 -0.32 0.79 -2.03 1.00 1.00 5261 5842
## logalpha_item3 0.73 0.53 -0.41 1.65 1.00 2803 2444
## logalpha_item4 0.78 0.51 -0.32 1.65 1.00 1213 675
## logalpha_item5 0.71 0.55 -0.54 1.61 1.00 1302 870
## logalpha_item6 0.94 0.48 -0.01 1.83 1.00 3399 3187
## logalpha_item7 1.07 0.55 0.03 2.16 1.00 5330 5280
## logalpha_item8 1.40 0.71 0.11 2.81 1.00 5486 5156
## logalpha_item9 1.32 0.69 0.02 2.74 1.00 4992 6304
## logalpha_item10 -0.54 0.77 -2.17 0.82 1.00 5955 5751
## logalpha_item11 -0.64 0.74 -2.23 0.65 1.00 6769 6033
## logalpha_item12 -0.68 0.72 -2.23 0.58 1.00 7476 5301
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
person_pars_3pl_fixed_nh <-
ranef(fit_3pl_fixed_nh, summary = FALSE)$person[, , "theta_Intercept"]
person_sds_3pl_fixed_nh <- apply(person_pars_3pl_fixed_nh, 1, sd)
person_pars_3pl_fixed_nh <- person_pars_3pl_fixed_nh %>%
sweep(1, person_sds_3pl_fixed_nh, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_3pl_fixed_nh, "results/person_pars_3pl_fixed_nh")
person_pars_3pl_fixed_nh %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_3pl_fixed_nh <- fit_statistic(
fit_3pl_fixed_nh, criterion = ll, group = person,
nsamples = 1000
)
person_diff_3pl_fixed_nh <- person_fit_3pl_fixed_nh %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_3pl_fixed_nh <- which.max(person_diff_3pl_fixed_nh$bp)
person_fit_3pl_fixed_nh %>%
filter(person == person_max_3pl_fixed_nh) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# extract item parameters
item_pars_3pl_fixed_nh <- brms::fixef(fit_3pl_fixed_nh, summary = FALSE)
# plot item parameters
# locations
sel_beta <- grepl("^beta", colnames(item_pars_3pl_fixed_nh))
beta <- item_pars_3pl_fixed_nh[, sel_beta] %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# slopes
sel_alpha <- grepl("^logalpha", colnames(item_pars_3pl_fixed_nh))
alpha <- item_pars_3pl_fixed_nh[, sel_alpha] %>%
exp() %>%
sweep(1, person_sds_3pl_fixed_nh, "*") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
item_pars_3pl_fixed_nh <- bind_rows(beta, alpha, .id = "nlpar") %>%
rename(item = "rowname") %>%
mutate(
item = as.numeric(item),
item = item - 0.1,
nlpar = factor(nlpar, labels = c("Location", "Slope")),
method = "MCMC-NH"
)
saveRDS(item_pars_3pl_fixed_nh, "results/item_pars_3pl_fixed_nh")
item_fit_3pl_fixed_nh <- fit_statistic(
fit_3pl_fixed_nh, criterion = ll, group = item,
nsamples = 1000
)
item_fit_3pl_fixed_nh %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

MAP
parprior_3pl_fixed_map <- list(
c(seq(1, 45, 4), "lnorm", 0, 2),
c(seq(2, 46, 4), "norm", 0, 5)
)
fit_3pl_fixed_map <- mirt(
spm_wide, model = 1,
itemtype = "2PL", guess = 0.11,
SE = TRUE, technical = list(NCYCLES = 10000),
parprior = parprior_3pl_fixed_map
)
person_pars_3pl_fixed_map <- fscores(fit_3pl_fixed_map, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 3pl_fixed model
scale = sd(Estimate) / sd(person_pars_3pl_fixed$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_3pl_fixed_map, "results/person_pars_3pl_fixed_map")
item_pars_3pl_fixed_map <- coef(fit_3pl_fixed_map, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.(a1|d)$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.2,
nlpar = str_match(rowname, "\\.[^\\.]+$") %>%
str_remove("^\\.") %>%
factor(
levels = c("d", "a1"),
labels = c("Location", "Slope")
),
method = "MAP"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_3pl_fixed_map, "results/item_pars_3pl_fixed_map")
ML
fit_3pl_fixed_ml <- mirt(
spm_wide, model = 1, itemtype = "2PL", guess = 0.11,
SE = TRUE, technical = list(NCYCLES = 10000))
person_pars_3pl_fixed_ml <- fscores(fit_3pl_fixed_ml, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 3pl_fixed model
scale = sd(Estimate) / sd(person_pars_3pl_fixed$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_3pl_fixed_ml, "results/person_pars_3pl_fixed_ml")
item_pars_3pl_fixed_ml <- coef(fit_3pl_fixed_ml, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.(a1|d)$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.3,
nlpar = str_match(rowname, "\\.[^\\.]+$") %>%
str_remove("^\\.") %>%
factor(
levels = c("d", "a1"),
labels = c("Location", "Slope")
),
method = "ML"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_3pl_fixed_ml, "results/item_pars_3pl_fixed_ml")
Joint plots: Person parameters
# plot comparison of person parameters
person_pars_3pl_fixed_all <- person_pars_3pl_fixed %>%
bind_cols(person_pars_3pl_fixed_ml) %>%
mutate(
UIW = Q97.5 - Q2.5,
UIW1 = Q97.51 - Q2.51
)
person_pars_3pl_fixed_all %>%
ggplot(aes(Estimate, Estimate1, color = UIW)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(-3, 2), y = c(-3, 2)) +
labs(
x = "Estimate (MCMC-H)",
y = "Estimate (ML)",
color = "UIW (MCMC-H)"
)

person_pars_3pl_fixed_all %>%
ggplot(aes(UIW, UIW1, color = Estimate)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(1, 2.4), y = c(1, 2.4)) +
labs(
x = "UIW (MCMC-H)",
y = "UIW (ML)",
color = "Estimate (MCMC-H)"
)

Joint plots: Item parameters
item_pars_3pl_fixed %>%
bind_rows(
item_pars_3pl_fixed_nh,
item_pars_3pl_fixed_map,
item_pars_3pl_fixed_ml
) %>%
mutate(
method = factor(method, levels = c("MCMC-H", "MCMC-NH", "MAP", "ML"))
) %>%
ggplot(aes(item, Estimate, ymin = Q2.5, ymax = Q97.5, color = method)) +
scale_x_continuous(breaks = 1:12) +
geom_pointrange() +
facet_wrap("nlpar", scales = "free_x") +
scale_color_viridis_d(name = "Method") +
coord_flip() +
labs(x = "Item Number")

3PL Models
MCMC with hierarchical item priors
formula_3pl <- bf(
response2 ~ gamma + (1 - gamma) *
inv_logit(beta + exp(logalpha) * theta),
nl = TRUE,
theta ~ 0 + (1 | person),
beta ~ 1 + (1 |i| item),
logalpha ~ 1 + (1 |i| item),
logitgamma ~ 1 + (1 |i| item),
nlf(gamma ~ inv_logit(logitgamma)),
family = brmsfamily("bernoulli", link = "identity")
)
prior_3pl <-
prior("normal(0, 2)", class = "b", nlpar = "beta") +
prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
prior("normal(-2, 0.5)", class = "b", nlpar = "logitgamma") +
prior("normal(0, 1)", class = "sd", group = "person", nlpar = "theta") +
prior("normal(0, 3)", class = "sd", group = "item", nlpar = "beta") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logalpha") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logitgamma")
fit_3pl <- brm(
formula = formula_3pl,
data = spm_long,
prior = prior_3pl,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control, inits = 0,
file = "models/fit_3pl"
)
fit_3pl <- add_loo(fit_3pl)
## Warning: Found 16 observations with a pareto_k > 0.7 in model 'fit_3pl'. With this many problematic observations, it may be more appropriate to use 'kfold' with
## argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
summary(fit_3pl)
## Family: bernoulli
## Links: mu = identity
## Formula: response2 ~ gamma + (1 - gamma) * inv_logit(beta + exp(logalpha) * theta)
## theta ~ 0 + (1 | person)
## beta ~ 1 + (1 | i | item)
## logalpha ~ 1 + (1 | i | item)
## logitgamma ~ 1 + (1 | i | item)
## gamma ~ inv_logit(logitgamma)
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(theta_Intercept) 1.19 0.57 0.32 2.51 1.00 11359 4714
##
## ~item (Number of levels: 12)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(beta_Intercept) 2.58 0.61 1.67 4.05 1.00 2898 5278
## sd(logalpha_Intercept) 0.54 0.15 0.31 0.91 1.00 3731 5032
## sd(logitgamma_Intercept) 1.01 0.49 0.22 2.12 1.00 3049 3145
## cor(beta_Intercept,logalpha_Intercept) -0.05 0.30 -0.61 0.53 1.00 4226 5303
## cor(beta_Intercept,logitgamma_Intercept) -0.47 0.31 -0.92 0.24 1.00 6611 5405
## cor(logalpha_Intercept,logitgamma_Intercept) -0.02 0.38 -0.72 0.72 1.00 6409 6410
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## beta_Intercept 0.58 0.76 -0.99 2.01 1.00 1837 2958
## logalpha_Intercept 0.74 0.54 -0.18 1.95 1.00 9883 4864
## logitgamma_Intercept -2.67 0.34 -3.37 -2.03 1.00 10401 5930
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_3pl)


person_pars_3pl <-
ranef(fit_3pl, summary = FALSE)$person[, , "theta_Intercept"]
person_sds_3pl <- apply(person_pars_3pl, 1, sd)
person_pars_3pl <- person_pars_3pl %>%
sweep(1, person_sds_3pl, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_3pl, "results/person_pars_3pl")
person_pars_3pl %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_3pl <- fit_statistic(
fit_3pl, criterion = ll, group = person,
nsamples = 1000
)
person_diff_3pl <- person_fit_3pl %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_3pl <- which.max(person_diff_3pl$bp)
person_fit_3pl %>%
filter(person == person_max_3pl) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# extract item parameters
item_pars_3pl <- coef(fit_3pl, summary = FALSE)$item
# plot item parameters
# locations
beta <- item_pars_3pl[, , "beta_Intercept"] %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# slopes
alpha <- item_pars_3pl[, , "logalpha_Intercept"] %>%
exp() %>%
sweep(1, person_sds_3pl, "*") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# guessing parameters
gamma <- item_pars_3pl[, , "logitgamma_Intercept"] %>%
inv_logit_scaled() %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
item_pars_3pl <- bind_rows(beta, alpha, gamma, .id = "nlpar") %>%
rename(item = "rowname") %>%
mutate(item = as.numeric(item)) %>%
mutate(
nlpar = factor(nlpar, labels = c("Location", "Slope", "Guessing")),
method = "MCMC-H"
)
saveRDS(item_pars_3pl, "results/item_pars_3pl")
item_fit_3pl <- fit_statistic(
fit_3pl, criterion = ll, group = item,
nsamples = 1000
)
item_fit_3pl %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

MCMC with non-hierarchical item priors
formula_3pl_nh <- bf(
response2 ~ gamma + (1 - gamma) *
inv_logit(beta + exp(logalpha) * theta),
nl = TRUE,
theta ~ 0 + (1 | person),
beta ~ 0 + item,
logalpha ~ 0 + item,
logitgamma ~ 0 + item,
nlf(gamma ~ inv_logit(logitgamma)),
family = brmsfamily("bernoulli", link = "identity")
)
prior_3pl_nh <-
prior("normal(0, 5)", class = "b", nlpar = "beta") +
prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
prior("normal(-2, 1)", class = "b", nlpar = "logitgamma") +
prior("normal(0, 1)", class = "sd", group = "person", nlpar = "theta")
fit_3pl_nh <- brm(
formula = formula_3pl_nh,
data = spm_long,
prior = prior_3pl_nh,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control, inits = 0,
file = "models/fit_3pl_nh"
)
fit_3pl_nh <- add_loo(fit_3pl_nh)
## Warning: Found 140 observations with a pareto_k > 0.7 in model 'fit_3pl_nh'. With this many problematic observations, it may be more appropriate to use 'kfold'
## with argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
## Automatically saving the model object in 'models/fit_3pl_nh.rds'
summary(fit_3pl_nh)
## Family: bernoulli
## Links: mu = identity
## Formula: response2 ~ gamma + (1 - gamma) * inv_logit(beta + exp(logalpha) * theta)
## theta ~ 0 + (1 | person)
## beta ~ 0 + item
## logalpha ~ 0 + item
## logitgamma ~ 0 + item
## gamma ~ inv_logit(logitgamma)
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(theta_Intercept) 1.51 0.52 0.67 2.71 1.00 1309 1868
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## beta_item1 1.21 0.21 0.64 1.53 1.00 4846 3628
## beta_item2 3.45 0.40 2.69 4.29 1.00 6865 5592
## beta_item3 1.94 0.22 1.51 2.38 1.00 5251 5523
## beta_item4 3.77 0.59 2.82 5.11 1.00 3631 4552
## beta_item5 5.46 1.20 3.79 8.43 1.00 3154 3340
## beta_item6 2.00 0.25 1.56 2.51 1.00 4285 5928
## beta_item7 1.10 0.20 0.65 1.45 1.00 4566 4993
## beta_item8 0.41 0.16 0.07 0.70 1.00 3669 4250
## beta_item9 -0.48 0.63 -2.05 0.41 1.00 2103 1876
## beta_item10 -0.78 0.18 -1.16 -0.46 1.00 3086 4958
## beta_item11 -4.38 1.97 -9.60 -2.02 1.00 1719 2879
## beta_item12 -3.34 1.35 -6.77 -1.67 1.00 1869 2180
## logalpha_item1 -0.51 0.40 -1.25 0.32 1.00 1421 2464
## logalpha_item2 0.35 0.40 -0.39 1.16 1.00 1494 2644
## logalpha_item3 0.15 0.38 -0.56 0.95 1.00 1334 2573
## logalpha_item4 0.97 0.39 0.26 1.78 1.00 1532 2589
## logalpha_item5 1.23 0.42 0.46 2.08 1.00 1696 2458
## logalpha_item6 0.50 0.38 -0.21 1.29 1.00 1324 2186
## logalpha_item7 0.13 0.39 -0.58 0.92 1.00 1462 2212
## logalpha_item8 0.13 0.38 -0.57 0.91 1.00 1373 2410
## logalpha_item9 0.46 0.48 -0.43 1.44 1.00 1571 3123
## logalpha_item10 0.41 0.38 -0.28 1.21 1.00 1435 2280
## logalpha_item11 1.52 0.53 0.58 2.65 1.00 1476 2635
## logalpha_item12 0.89 0.48 -0.01 1.90 1.00 1321 2263
## logitgamma_item1 -3.85 2.09 -8.66 -0.64 1.00 4530 3795
## logitgamma_item2 -3.51 2.18 -8.50 -0.15 1.00 6773 5403
## logitgamma_item3 -4.09 1.95 -8.48 -1.11 1.00 7759 5664
## logitgamma_item4 -5.07 1.76 -9.02 -2.34 1.00 10630 5667
## logitgamma_item5 -5.04 1.81 -9.19 -2.26 1.00 7851 4759
## logitgamma_item6 -4.56 1.81 -8.83 -1.92 1.00 8048 5901
## logitgamma_item7 -3.88 1.97 -8.59 -1.18 1.00 5348 5123
## logitgamma_item8 -4.82 1.83 -9.07 -2.03 1.00 6709 5302
## logitgamma_item9 -1.55 1.15 -5.24 -0.53 1.00 2177 1016
## logitgamma_item10 -5.61 1.66 -9.37 -3.11 1.00 7978 5757
## logitgamma_item11 -2.08 0.24 -2.58 -1.65 1.00 6440 5920
## logitgamma_item12 -1.82 0.26 -2.41 -1.39 1.00 4491 5120
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_3pl_nh)








person_pars_3pl_nh <-
ranef(fit_3pl_nh, summary = FALSE)$person[, , "theta_Intercept"]
person_sds_3pl_nh <- apply(person_pars_3pl_nh, 1, sd)
person_pars_3pl_nh <- person_pars_3pl_nh %>%
sweep(1, person_sds_3pl_nh, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_3pl_nh, "results/person_pars_3pl_nh")
person_pars_3pl_nh %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_3pl_nh <- fit_statistic(
fit_3pl_nh, criterion = ll, group = person,
nsamples = 1000
)
person_diff_3pl_nh <- person_fit_3pl_nh %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_3pl_nh <- which.max(person_diff_3pl_nh$bp)
person_fit_3pl_nh %>%
filter(person == person_max_3pl_nh) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# extract item parameters
item_pars_3pl_nh <- brms::fixef(fit_3pl_nh, summary = FALSE)
# plot item parameters
# locations
sel_beta <- grepl("^beta", colnames(item_pars_3pl_nh))
beta <- item_pars_3pl_nh[, sel_beta] %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# slopes
sel_alpha <- grepl("^logalpha", colnames(item_pars_3pl_nh))
alpha <- item_pars_3pl_nh[, sel_alpha] %>%
exp() %>%
sweep(1, person_sds_3pl_nh, "*") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# guessing parameters
sel_gamma <- grepl("^logitgamma", colnames(item_pars_3pl_nh))
gamma <- item_pars_3pl_nh[, sel_gamma] %>%
inv_logit_scaled() %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
item_pars_3pl_nh <- bind_rows(beta, alpha, gamma, .id = "nlpar") %>%
rename(item = "rowname") %>%
mutate(
item = as.numeric(item),
item = item - 0.1,
nlpar = factor(nlpar, labels = c("Location", "Slope", "Guessing")),
method = "MCMC-NH"
)
saveRDS(item_pars_3pl_nh, "results/item_pars_3pl_nh")
item_fit_3pl_nh <- fit_statistic(
fit_3pl_nh, criterion = ll, group = item,
nsamples = 1000
)
item_fit_3pl_nh %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 36 rows containing non-finite values (stat_bin).

MAP
parprior_3pl_map <- list(
c(seq(1, 45, 4), "lnorm", 0, 2),
c(seq(2, 46, 4), "norm", 0, 5),
c(seq(3, 47, 4), "norm", -2, 1)
)
fit_3pl_map <- mirt(
spm_wide, model = 1,
itemtype = "3PL", SE = TRUE,
technical = list(NCYCLES = 10000),
parprior = parprior_3pl_map
)
person_pars_3pl_map <- fscores(fit_3pl_map, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 3pl model
scale = sd(Estimate) / sd(person_pars_3pl$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_3pl_map, "results/person_pars_3pl_map")
item_pars_3pl_map <- coef(fit_3pl_map, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.(a1|d|g)$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.2,
nlpar = str_match(rowname, "\\.[^\\.]+$") %>%
str_remove("^\\.") %>%
factor(
levels = c("d", "a1", "g"),
labels = c("Location", "Slope", "Guessing")
),
method = "MAP"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_3pl_map, "results/item_pars_3pl_map")
ML
fit_3pl_ml <- mirt(spm_wide, model = 1, itemtype = "3PL", SE = TRUE,
technical = list(NCYCLES = 10000))
person_pars_3pl_ml <- fscores(fit_3pl_ml, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 3pl model
scale = sd(Estimate) / sd(person_pars_3pl$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_3pl_ml, "results/person_pars_3pl_ml")
item_pars_3pl_ml <- coef(fit_3pl_ml, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.(a1|d|g)$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.3,
nlpar = str_match(rowname, "\\.[^\\.]+$") %>%
str_remove("^\\.") %>%
factor(
levels = c("d", "a1", "g"),
labels = c("Location", "Slope", "Guessing")
),
method = "ML"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_3pl_ml, "results/item_pars_3pl_ml")
Joint plots: Person parameters
# plot comparison of person parameters
person_pars_3pl_all <- person_pars_3pl %>%
bind_cols(person_pars_3pl_ml) %>%
mutate(
UIW = Q97.5 - Q2.5,
UIW1 = Q97.51 - Q2.51
)
person_pars_3pl_all %>%
ggplot(aes(Estimate, Estimate1, color = UIW)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(-3, 2), y = c(-3, 2)) +
labs(
x = "Estimate (MCMC-H)",
y = "Estimate (ML)",
color = "UIW (MCMC-H)"
)

person_pars_3pl_all %>%
ggplot(aes(UIW, UIW1, color = Estimate)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(1, 2.4), y = c(1, 2.4)) +
labs(
x = "UIW (MCMC-H)",
y = "UIW (ML)",
color = "Estimate (MCMC-H)"
)

Joint plots: Item parameters
item_pars_3pl %>%
bind_rows(
item_pars_3pl_nh,
item_pars_3pl_map,
item_pars_3pl_ml
) %>%
mutate(
method = factor(method, levels = c("MCMC-H", "MCMC-NH", "MAP", "ML"))
) %>%
ggplot(aes(item, Estimate, ymin = Q2.5, ymax = Q97.5, color = method)) +
scale_x_continuous(breaks = 1:12) +
geom_pointrange() +
facet_wrap("nlpar", scales = "free_x") +
scale_color_viridis_d(name = "Method") +
coord_flip() +
labs(x = "Item Number")

4PL Models
MCMC with hierarchical item priors
formula_4pl <- bf(
response2 ~ gamma + (1 - gamma - psi) *
inv_logit(beta + exp(logalpha) * theta),
nl = TRUE,
theta ~ 0 + (1 | person),
beta ~ 1 + (1 |i| item),
logalpha ~ 1 + (1 |i| item),
logitgamma ~ 1 + (1 |i| item),
nlf(gamma ~ inv_logit(logitgamma)),
logitpsi ~ 1 + (1 |i| item),
nlf(psi ~ inv_logit(logitpsi)),
family = brmsfamily("bernoulli", link = "identity")
)
prior_4pl <-
prior("normal(0, 2)", class = "b", nlpar = "beta") +
prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
prior("normal(-2, 0.5)", class = "b", nlpar = "logitgamma") +
prior("normal(-2, 0.5)", class = "b", nlpar = "logitpsi") +
prior("normal(0, 1)", class = "sd", group = "person", nlpar = "theta") +
prior("normal(0, 3)", class = "sd", group = "item", nlpar = "beta") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logalpha") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logitgamma") +
prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logitpsi")
fit_4pl <- brm(
formula = formula_4pl,
data = spm_long,
prior = prior_4pl,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control, inits = 0,
file = "models/fit_4pl"
)
fit_4pl <- add_loo(fit_4pl)
## Warning: Found 59 observations with a pareto_k > 0.7 in model 'fit_4pl'. With this many problematic observations, it may be more appropriate to use 'kfold' with
## argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
summary(fit_4pl)
## Family: bernoulli
## Links: mu = identity
## Formula: response2 ~ gamma + (1 - gamma - psi) * inv_logit(beta + exp(logalpha) * theta)
## theta ~ 0 + (1 | person)
## beta ~ 1 + (1 | i | item)
## logalpha ~ 1 + (1 | i | item)
## logitgamma ~ 1 + (1 | i | item)
## gamma ~ inv_logit(logitgamma)
## logitpsi ~ 1 + (1 | i | item)
## psi ~ inv_logit(logitpsi)
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(theta_Intercept) 1.25 0.56 0.36 2.48 1.00 6141 4347
##
## ~item (Number of levels: 12)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(beta_Intercept) 3.13 0.78 1.96 5.03 1.00 2284 3346
## sd(logalpha_Intercept) 0.50 0.18 0.21 0.93 1.00 2086 3319
## sd(logitgamma_Intercept) 0.98 0.46 0.23 2.00 1.00 2075 2261
## sd(logitpsi_Intercept) 1.12 0.48 0.31 2.20 1.00 1983 2035
## cor(beta_Intercept,logalpha_Intercept) -0.05 0.32 -0.64 0.56 1.00 2940 4470
## cor(beta_Intercept,logitgamma_Intercept) -0.38 0.29 -0.84 0.25 1.00 3874 4397
## cor(logalpha_Intercept,logitgamma_Intercept) -0.02 0.37 -0.70 0.69 1.00 3018 4308
## cor(beta_Intercept,logitpsi_Intercept) -0.40 0.32 -0.89 0.32 1.00 2327 3738
## cor(logalpha_Intercept,logitpsi_Intercept) -0.20 0.36 -0.83 0.54 1.00 2403 4344
## cor(logitgamma_Intercept,logitpsi_Intercept) 0.23 0.39 -0.56 0.87 1.00 2802 4480
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## beta_Intercept 0.67 0.94 -1.25 2.53 1.00 1442 2364
## logalpha_Intercept 0.93 0.52 0.05 2.09 1.00 4823 3552
## logitgamma_Intercept -2.43 0.34 -3.12 -1.77 1.00 4158 5096
## logitpsi_Intercept -3.06 0.36 -3.76 -2.32 1.00 3110 4945
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_4pl)



person_pars_4pl <-
ranef(fit_4pl, summary = FALSE)$person[, , "theta_Intercept"]
person_sds_4pl <- apply(person_pars_4pl, 1, sd)
person_pars_4pl <- person_pars_4pl %>%
sweep(1, person_sds_4pl, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_4pl, "results/person_pars_4pl")
person_pars_4pl %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_4pl <- fit_statistic(
fit_4pl, criterion = ll, group = person,
nsamples = 1000
)
person_diff_4pl <- person_fit_4pl %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_4pl <- which.max(person_diff_4pl$bp)
person_fit_4pl %>%
filter(person == person_max_4pl) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# extract item parameters
item_pars_4pl <- coef(fit_4pl, summary = FALSE)$item
# locations
beta <- item_pars_4pl[, , "beta_Intercept"] %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# slopes
alpha <- item_pars_4pl[, , "logalpha_Intercept"] %>%
exp() %>%
sweep(1, person_sds_4pl, "*") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# guessing parameters
gamma <- item_pars_4pl[, , "logitgamma_Intercept"] %>%
inv_logit_scaled() %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# lapse parameters
psi <- item_pars_4pl[, , "logitpsi_Intercept"] %>%
inv_logit_scaled() %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
item_pars_4pl <- bind_rows(beta, alpha, gamma, psi, .id = "nlpar") %>%
rename(item = "rowname") %>%
mutate(item = as.numeric(item)) %>%
mutate(
nlpar = nlpar %>%
factor(labels = c("Location", "Slope", "Guessing", "Lapse")),
method = "MCMC-H"
)
saveRDS(item_pars_4pl, "results/item_pars_4pl")
item_fit_4pl <- fit_statistic(
fit_4pl, criterion = ll, group = item,
nsamples = 1000
)
item_fit_4pl %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

MCMC with non-hierarchical item priors
formula_4pl_nh <- bf(
response2 ~ gamma + (1 - gamma - psi) *
inv_logit(beta + exp(logalpha) * theta),
nl = TRUE,
theta ~ 0 + (1 | person),
beta ~ 0 + item,
logalpha ~ 0 + item,
logitgamma ~ 0 + item,
nlf(gamma ~ inv_logit(logitgamma)),
logitpsi ~ 0 + item,
nlf(psi ~ inv_logit(logitpsi)),
family = brmsfamily("bernoulli", link = "identity")
)
prior_4pl_nh <-
prior("normal(0, 5)", class = "b", nlpar = "beta") +
prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
prior("normal(-2, 1)", class = "b", nlpar = "logitgamma") +
prior("normal(-2, 1)", class = "b", nlpar = "logitpsi") +
prior("normal(0, 1)", class = "sd", group = "person", nlpar = "theta")
fit_4pl_nh <- brm(
formula = formula_4pl_nh,
data = spm_long,
prior = prior_4pl_nh,
chains = cores, iter = iter, warmup = warmup,
cores = cores, control = control, inits = 0,
file = "models/fit_4pl_nh"
)
fit_4pl_nh <- add_loo(fit_4pl_nh)
## Warning: Found 291 observations with a pareto_k > 0.7 in model 'fit_4pl_nh'. With this many problematic observations, it may be more appropriate to use 'kfold'
## with argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
## Automatically saving the model object in 'models/fit_4pl_nh.rds'
summary(fit_4pl_nh)
## Family: bernoulli
## Links: mu = identity
## Formula: response2 ~ gamma + (1 - gamma - psi) * inv_logit(beta + exp(logalpha) * theta)
## theta ~ 0 + (1 | person)
## beta ~ 0 + item
## logalpha ~ 0 + item
## logitgamma ~ 0 + item
## gamma ~ inv_logit(logitgamma)
## logitpsi ~ 0 + item
## psi ~ inv_logit(logitpsi)
## Data: spm_long (Number of observations: 5988)
## Samples: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup samples = 8000
##
## Group-Level Effects:
## ~person (Number of levels: 499)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(theta_Intercept) 2.51 0.53 1.57 3.66 1.00 3901 4812
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## beta_item1 2.86 1.21 1.33 5.97 1.00 3885 2947
## beta_item2 4.70 1.02 3.27 7.23 1.00 5323 3737
## beta_item3 3.05 1.18 1.87 6.50 1.00 1705 1048
## beta_item4 6.10 2.39 3.16 12.18 1.00 1940 3780
## beta_item5 8.77 2.58 4.79 14.59 1.00 3486 5607
## beta_item6 4.33 1.65 2.29 8.62 1.00 2864 4268
## beta_item7 1.34 0.33 0.76 2.06 1.00 6321 5146
## beta_item8 0.58 0.30 -0.01 1.19 1.00 4586 5099
## beta_item9 -0.57 0.98 -3.16 0.55 1.00 2383 2594
## beta_item10 -0.64 0.34 -1.31 0.03 1.00 3796 4740
## beta_item11 -5.00 2.13 -10.31 -2.12 1.00 3877 4830
## beta_item12 -5.25 2.14 -10.46 -2.28 1.00 3695 5003
## logalpha_item1 -0.22 0.42 -1.00 0.67 1.00 3984 4121
## logalpha_item2 0.17 0.32 -0.43 0.84 1.00 4372 4422
## logalpha_item3 0.32 0.51 -0.46 1.54 1.00 1364 1176
## logalpha_item4 0.80 0.39 0.09 1.58 1.00 2529 4459
## logalpha_item5 1.13 0.35 0.45 1.82 1.00 3981 5524
## logalpha_item6 0.78 0.42 0.03 1.67 1.00 2984 3963
## logalpha_item7 -0.15 0.32 -0.72 0.52 1.00 3992 4842
## logalpha_item8 -0.11 0.33 -0.67 0.59 1.00 3997 4087
## logalpha_item9 0.49 0.62 -0.42 2.03 1.00 1988 2452
## logalpha_item10 0.18 0.36 -0.42 0.99 1.00 2855 2738
## logalpha_item11 1.25 0.45 0.41 2.16 1.00 3590 4658
## logalpha_item12 0.96 0.45 0.10 1.85 1.00 3998 5381
## logitgamma_item1 -2.29 0.88 -4.06 -0.65 1.00 10390 6011
## logitgamma_item2 -2.12 0.94 -3.98 -0.31 1.00 6934 4371
## logitgamma_item3 -1.54 0.91 -3.59 -0.22 1.00 1778 3833
## logitgamma_item4 -3.14 0.70 -4.65 -1.92 1.00 11207 6002
## logitgamma_item5 -2.99 0.69 -4.44 -1.75 1.00 8074 5706
## logitgamma_item6 -2.24 0.57 -3.57 -1.29 1.00 6385 5130
## logitgamma_item7 -2.17 0.81 -3.93 -0.86 1.00 7305 5079
## logitgamma_item8 -2.62 0.75 -4.24 -1.33 1.00 7618 5020
## logitgamma_item9 -1.20 0.42 -2.21 -0.61 1.00 3879 4804
## logitgamma_item10 -3.44 0.62 -4.78 -2.35 1.00 9807 6139
## logitgamma_item11 -2.05 0.22 -2.51 -1.65 1.00 8015 6160
## logitgamma_item12 -1.74 0.19 -2.13 -1.40 1.00 9098 5892
## logitpsi_item1 -2.08 0.44 -3.21 -1.48 1.00 4721 4608
## logitpsi_item2 -4.02 0.50 -5.10 -3.18 1.00 8222 5538
## logitpsi_item3 -3.35 0.43 -4.29 -2.61 1.00 9236 5683
## logitpsi_item4 -3.52 0.57 -4.84 -2.64 1.00 2322 4585
## logitpsi_item5 -3.84 0.44 -4.84 -3.14 1.00 5255 4971
## logitpsi_item6 -3.19 0.39 -4.04 -2.52 1.00 6314 5835
## logitpsi_item7 -3.24 0.58 -4.56 -2.26 1.00 9009 6492
## logitpsi_item8 -2.84 0.62 -4.24 -1.85 1.00 7113 6462
## logitpsi_item9 -2.91 0.62 -4.33 -1.92 1.00 5038 5822
## logitpsi_item10 -2.22 0.68 -3.85 -1.17 1.00 3539 4698
## logitpsi_item11 -2.61 0.66 -4.08 -1.55 1.00 2572 4372
## logitpsi_item12 -2.32 0.80 -4.07 -1.02 1.00 1676 3817
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_4pl_nh)










person_pars_4pl_nh <-
ranef(fit_4pl_nh, summary = FALSE)$person[, , "theta_Intercept"]
person_sds_4pl_nh <- apply(person_pars_4pl_nh, 1, sd)
person_pars_4pl_nh <- person_pars_4pl_nh %>%
sweep(1, person_sds_4pl_nh, "/") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column(var = "person") %>%
mutate(person = as.numeric(person))
saveRDS(person_pars_4pl_nh, "results/person_pars_4pl_nh")
person_pars_4pl_nh %>%
arrange(Estimate) %>%
mutate(id2 = seq_len(n())) %>%
ggplot(aes(id2, Estimate, ymin = Q2.5, ymax = Q97.5)) +
geom_pointrange(alpha = 0.7) +
coord_flip() +
labs(x = "Person Number (sorted after Estimate)") +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)

person_fit_4pl_nh <- fit_statistic(
fit_4pl_nh, criterion = ll, group = person,
nsamples = 1000
)
person_diff_4pl_nh <- person_fit_4pl_nh %>%
group_by(person) %>%
summarise(bp = mean(crit_diff > 0))
person_max_4pl_nh <- which.max(person_diff_4pl_nh$bp)
person_fit_4pl_nh %>%
filter(person == person_max_4pl_nh) %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# extract item parameters
item_pars_4pl_nh <- brms::fixef(fit_4pl_nh, summary = FALSE)
# plot item parameters
# locations
sel_beta <- grepl("^beta", colnames(item_pars_4pl_nh))
beta <- item_pars_4pl_nh[, sel_beta] %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# slopes
sel_alpha <- grepl("^logalpha", colnames(item_pars_4pl_nh))
alpha <- item_pars_4pl_nh[, sel_alpha] %>%
exp() %>%
sweep(1, person_sds_4pl_nh, "*") %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# guessing parameters
sel_gamma <- grepl("^logitgamma", colnames(item_pars_4pl_nh))
gamma <- item_pars_4pl_nh[, sel_gamma] %>%
inv_logit_scaled() %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
# lapse parameters
sel_psi <- grepl("^logitpsi", colnames(item_pars_4pl_nh))
psi <- item_pars_4pl_nh[, sel_psi] %>%
inv_logit_scaled() %>%
posterior_summary() %>%
as_tibble() %>%
rownames_to_column()
item_pars_4pl_nh <- bind_rows(beta, alpha, gamma, psi, .id = "nlpar") %>%
rename(item = "rowname") %>%
mutate(
item = as.numeric(item),
item = item - 0.1,
nlpar = nlpar %>%
factor(labels = c("Location", "Slope", "Guessing", "Lapse")),
method = "MCMC-NH"
)
saveRDS(item_pars_4pl_nh, "results/item_pars_4pl_nh")
item_fit_4pl_nh <- fit_statistic(
fit_4pl_nh, criterion = ll, group = item,
nsamples = 1000
)
item_fit_4pl_nh %>%
ggplot(aes(crit_diff)) +
geom_histogram() +
facet_wrap("item", scales = "free") +
theme_hist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

MAP
parprior_4pl_map <- list(
c(seq(1, 45, 4), "lnorm", 0, 2),
c(seq(2, 46, 4), "norm", 0, 5),
c(seq(3, 47, 4), "norm", -2, 1),
c(seq(4, 48, 4), "norm", 2, 1)
)
fit_4pl_map <- mirt(
spm_wide, model = 1,
itemtype = "4PL", SE = TRUE,
technical = list(NCYCLES = 10000),
parprior = parprior_4pl_map
)
person_pars_4pl_map <- fscores(fit_4pl_map, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 4pl model
scale = sd(Estimate) / sd(person_pars_4pl$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_4pl_map, "results/person_pars_4pl_map")
item_pars_4pl_map <- coef(fit_4pl_map, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.(a1|d|g|u)$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.2,
nlpar = str_match(rowname, "\\.[^\\.]+$") %>%
str_remove("^\\.") %>%
factor(
levels = c("d", "a1", "g", "u"),
labels = c("Location", "Slope", "Guessing", "Lapse")
),
# invert guessing parameter
par = ifelse(nlpar == "Lapse", 1 - par, par),
CI_2.5 = ifelse(nlpar == "Lapse", 1 - CI_2.5, CI_2.5),
CI_97.5 = ifelse(nlpar == "Lapse", 1 - CI_97.5, CI_97.5),
method = "MAP"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_4pl_map, "results/item_pars_4pl_map")
ML
fit_4pl_ml <- mirt(
spm_wide, model = 1, itemtype = "4PL", SE = TRUE,
technical = list(NCYCLES = 10000)
)
## Warning: Could not invert information matrix; model likely is not empirically identified.
person_pars_4pl_ml <- fscores(fit_4pl_ml, full.scores.SE = TRUE) %>%
as.data.frame() %>%
rename(Estimate = "F1", Est.Error = "SE_F1") %>%
mutate(
# person parameters are not scaled appropriately by mirt for the 4pl model
scale = sd(Estimate) / sd(person_pars_4pl$Estimate),
Estimate = Estimate / scale,
Est.Error = Est.Error / scale,
Q2.5 = Estimate - 1.96 * Est.Error,
Q97.5 = Estimate + 1.96 * Est.Error,
person = seq_len(n())
) %>%
select(-scale)
saveRDS(person_pars_4pl_ml, "results/person_pars_4pl_ml")
item_pars_4pl_ml <- coef(fit_4pl_ml, as.data.frame = TRUE) %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(str_detect(rowname, "\\.(a1|d|g|u)$")) %>%
mutate(
item = str_match(rowname, "^X[[:digit:]]+") %>%
str_remove("^X") %>%
as.numeric(),
item = item - 0.3,
nlpar = str_match(rowname, "\\.[^\\.]+$") %>%
str_remove("^\\.") %>%
factor(
levels = c("d", "a1", "g", "u"),
labels = c("Location", "Slope", "Guessing", "Lapse")
),
# invert guessing parameter
par = ifelse(nlpar == "Lapse", 1 - par, par),
# inverting the information matrix fails
CI_2.5 = par,
CI_97.5 = par,
method = "ML"
) %>%
rename(Estimate = par, Q2.5 = CI_2.5, Q97.5 = CI_97.5)
saveRDS(item_pars_4pl_ml, "results/item_pars_4pl_ml")
Joint plots: Person parameters
# plot comparison of person parameters
person_pars_4pl_all <- person_pars_4pl %>%
bind_cols(person_pars_4pl_ml) %>%
mutate(
UIW = Q97.5 - Q2.5,
UIW1 = Q97.51 - Q2.51
)
person_pars_4pl_all %>%
ggplot(aes(Estimate, Estimate1, color = UIW)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(-3, 2), y = c(-3, 2)) +
labs(
x = "Estimate (MCMC-H)",
y = "Estimate (ML)",
color = "UIW (MCMC-H)"
)

person_pars_4pl_all %>%
ggplot(aes(UIW, UIW1, color = Estimate)) +
geom_point() +
geom_abline() +
scale_color_viridis_c() +
lims(x = c(1, 2.4), y = c(1, 2.4)) +
labs(
x = "UIW (MCMC-H)",
y = "UIW (ML)",
color = "Estimate (MCMC-H)"
)
## Warning: Removed 65 rows containing missing values (geom_point).

Joint plots: Item parameters
item_pars_4pl %>%
bind_rows(
item_pars_4pl_nh,
item_pars_4pl_map,
item_pars_4pl_ml
) %>%
mutate(
method = factor(method, levels = c("MCMC-H", "MCMC-NH", "MAP", "ML"))
) %>%
ggplot(aes(item, Estimate, ymin = Q2.5, ymax = Q97.5, color = method)) +
scale_x_continuous(breaks = 1:12) +
geom_pointrange() +
facet_wrap("nlpar", scales = "free_x") +
scale_color_viridis_d(name = "Method") +
coord_flip() +
labs(x = "Item Number")
